import copy

network_args = {    
    "net_type": "mlp", # availiable networks: mlp, conv, pair, flatpair, keypair, multiattn, maskattn, parattn, inexp, multimlp, gpn, gnn
    "keypair": {
                "net_type": None, #"gin",
                "num_layer": 5,
                "JK": "last",
                "embed_dim": 512,
                "drop_ratio": 0.5,
            },
    "use_layer_norm": False, # applies layer norm at every level
    "hidden_sizes": list(), # The hidden layers. If empty the hidden sizes will USUALLY be directly connected to the outputs
    "init_form": "", # initialization strategy: 
    "activation": "leakyrelu", # available activations: relu, leakyrelu, sin, sinc, sigmoid, tanh, softmax, cos, none, prelu, crelu,
    "activation_final": "none", # same as above, but only for the last layer
    "scale_logits": -1, # scales the outputs by this value, negative is unused
    "scale_final": 1, # scales the final layer in MLPs
    "use_bias": True, # uses a bias in MLPs and 1D conv
    "dropout": 0.0, # applies dropout at every layer unless the network specifies otherwise
    "embed_dim": 0, # embeds the inputs, used as the embed_dim in transformers, keypair and pair networks
    "use_valid": True, # if false, ignores the valid vector
    "dist": {
        "base_variance": 0.01 # the minimum variance added to the output of a distribution
    },
    "inter_net": {
        "shared_encoding": False, # shares the encoding with the interaction model and forward model TODO: not implemented
    },
    "multi": {
        "use_population": False, # keeps a collection of networks
        "num_masks": -1, # should be set outside of network_args
        "embedding_sizes": list(), # number and shape of hidden layers
        "embedding_output": 256,
        "use_embedding": True,
    },
    "factor_net": {
        "drop_first": False,
        "reduce_function": "max", # available reduction methods: sum, prod, max, mean, cat, none
        "post_dim": -1, # inputs that are added later have this dimension
        "difference_first": False, # takes the differential of the inputs
        "final_layers": [], # layers after the convolution component
        "embed_layers": [], # the number of layers for the embedding network
        "num_pair_layers": 1, # if multilayered, repeatedly applies the same network architecture n times
        "repeat_layers": False, # repeats the same network if True, otherwise uses a new network for each layer
        "preencode": False, # encode prior to entering the pair network if True
        "pre_dropout": 0.0, # dropout only prior to the embedding
        "no_decode": True, # no decoding layer in keypair
        "append_keys": True, # appends the keys to the queries
        "append_zero_keys": False, # appends 0 in the shape of the keys to the queries. This is set automatically in (TODO: only) pairnets by append_keys = False and num_pair_layers > 1
        "append_mask": False, # appends the mask to the keys
        "append_broadcast_mask": 0, # appends a mask broadcast to this length of the mask
    },
    "embedpair": {
        "new_embedding": True, # indicates if the network needs a new embedding, or the inputs are already embedded
        "query_aggregate": True, # in keypair networks, aggregates the queries (for prediction)
        "preembed_dim": 64, # the embedding size for the internal pair network
    },
    "cluster": {
        "use_cluster": False,
        "inter_pair_layers": 1,
        "num_clusters": 0, # overloaded: for expert models, this is the number of clusters. For interaction selection models, this is the number of interaction masks
        "cluster_inter_hidden": [],
    },
    "comb_embed": {
        "max_hash": -1, # TODO: not implemented
    },
    "mask_attn": {
        "model_dim": 0, # the dimension of values
        "num_heads": 0, # the number of heads
        "num_layers": 1, # the number of layers for an attention model TODO: use factor_net.num_pair_layers?
        "attention_layer_norm": True,
        "attention_dropout": 0.0, # for dropout only at the attention level
        "mask_mode": "attn", # where the mask is applied, either 'query' or 'attn'
        "gumbel_attention": -1.0, # if a gumbel softmax is used at the attention level
        "bernoulli_weights": False, # if a bernoulli distribution instead of softmax
        "needs_encoding": True, # should be set in init, default value here
        "merge_function": "cat", # the function for merging together the heads
        "no_hidden": False, # adds no hidden layers to the key, query or value operations
        "renormalize": False, # Normalizes the attention weights after masking and valid
    },
    "input_expand": {
        "include_relative": False,
        "pre_embed": [],
        "first_include": 0, # set in init, this is default initialization
    },
    "optimizer": {
        "lr": 1e-4, # learning rate
        "alt_lr": 1e-5, # learning rate for joint optimization
        "eps": 1e-5, # 
        "alpha": 0.99, # 
        "betas": [0.9, 0.999], 
        "weight_decay": 0.00 # network weight decay
    },
}


full_args = {
    "name_id": "", # the identifier for the run, empty uses run_TIME
    "arg_dict": "full",
    "debug": {
        "run_test": "",
        "collect_mode": "",
    },
    "hyperparam": {
        "name_orders": []
    },
    "record": {
        "record_rollouts": "",
        "record_graphs": "", # records the tensorboard graphs for logging
        "record_recycle": -1,
        'save_dir': "",
        'load_dir': "",
        'load_checkpoint': "",
        'checkpoint_dir': "",
        "pretrain_dir": "",
        'save_action': False, # saves in record_rollouts, saves the whole action chain and the param
        "save_interval": 0,
        "log_filename": "",
        "wandb_log": "",
        "wandb_run": "",
        "refresh": False,
        "presave_graph": False,
        "load_intermediate": "", # loads an intermediate value, for faster debugging
        "save_intermediate": "", # saves to this location
        "load_trace": False, # loads the traces from data rather than regenerating them
    },
    "ROC": {
        "model_paths": [], # for loading multiple models (for ROC)
        "labels": [], # a list of string labels (for ROC)
        "generate_plot": False, # toggles the plot generation mode
    },
    "environment": {
        "env": None,
        "render": False,
        "frameskip": 1,
        "variant": "default",
        "horizon": -1,
        "seed": -1,
        "demonstrate": False,
        "load_environment": "",
        "fixed_limits": False,
        "gym_to_gymnasium": True, # wraps a gym environment with the gymnasium wrapper (adds an output to step and info to reset)
        "flat_obs": False, # flatten the observations instead of sending back a dict
        "append_id": False, # appends the identity of the object to the state
        "debug_mode": False, # puts the environment in a debugging mode where attributes are easier to identify. Implemented: RandomDAG
        "render_masks": False, # renders masks of the objects, render should also be true for this to work
    },
    "torch": {
        "gpu": 1,
        "cuda": True,
        "torch_seed": -1
    },
    # NEW ARGS
    "state": {
        "append_id": True, # appends the identifier to the observation
        "key_append_id": False, # appends the identifier to the value used for prediction
        "proximity_epsilon": -1, # how close is close enough for proximity # TODO: make this relative
    },
    "factor": {

    },
    "masking": {
        "selection_mask": False,
        "weighting": [-1,-1], # weighting_ratio, weighting schedule
        "low_weighting": [-1,-1], # weighting_ratio, weighting schedule for low interaction weights
        "inline_iters": [1,1,-1], # the maximum number of train steps per forward step, the minimum number, and the rate at which it doubles
        "entropy_weight": [0,-1], # entropy weight value, schedule to halve weight
        "oneloss": [0,-1], # rewards ones weight value, schedule to halve weight
        "halfloss": [0,-1], # rewards 50% random value, schedule to halve weight
        "lasso": [0,-1], # lasso value (no used with adaptive values), schedule to halve weight
        "adaptive_lasso_type": "likelihood", # statistic to use to decide how good the forward model is doing: likelihood, mean, meanvar
        "adaptive_lasso": [-1,-1], # adaptive lasso value (-1 first value means unusued), schedule to halve weight
        "adaptive_lasso_bias": [0,1], # bias to reduce the steepness of adaptive lasso penalizing values, subtracts from the total, second term flattens the adaptive lasso
        "lasso_order": 1, # p-norm p, 1 by default to encourage sparsity
    },
    "active": {
        "use_cluster": False, # uses a cluster based model for the active model
        "use_population": False, # uses a population of models
        "delay_inter_train": 0, # delays training the mask this number of steps
        "interaction_schedule": 1.0, # every interaction_schedule timesteps, the weight of full vs masked forward modeling is reduced by half
        "weighting": [-1,-1], # weighting_ratio, weighting schedule
        "full_weighting": [-1,-1], # weighting_ratio, weighting schedule
        "given_weighting": False, # uses weights computed in buffer.weights
        "soft_masking": [0.0, -1.0], # soft masking value, where zeros are replaced with the first value, with update schedule defined by second term 
        "resetting": {
            "reset_inter": [1000, 0, -1], # how often (in iters) to reset inter, bias for resetting inter, when to stop resetting inter (-1 for no resetting)
            "reset_active": [1000, 500, -1], # how often (in iters) to reset forward, bias for resetting forward, when to stop resetting forward (-1 for no resetting)
            "reset_form": "xnorm", # same init forms as used in network_args.init_form
            "reset_layers": 5, # number of layers to reset, 
            "adaptive_inter_reset": [100,100,100,0], # interaction net adaptive resetting based on var less than for  log_prob_reset, inter_reset, inter_val_reset
            "adaptive_active_reset": [100,100,100,0], # active net adaptive resetting based on var less than for  log_prob_reset, inter_reset, inter_val_reset
            "running_maxlen": 10, # the amount of iterations to keep track of for low variance
        },
        "random_masks": {
            "random_mask_schedule": [-1,-1], # rate at which to sample random masks for random mask training, schedule to decay random mask rate
            "adaptive_random_masking": [-1, 0], # the bias and value for adaptive random masking TODO: not implemented
            "safe_random_type": "", # only allows the following to be masked. possible: nonproximal
        },
        "full_steps": 1, # number of steps of full training per iteration
        "active_steps": 1, # number of masked training per iteration
        "trace_steps": 1, # number of steps for binary fitting
        "mask_inter_steps": 1, # number of steps for interaction fitting
        "trace": {
            "soft_val": 0.0, # regresses to 1-soft_val isntead of 1
            "use_trace_weights": False, # use computed weights for the trace 
        },
        "include_gradient": False, # includes input gradients in the output values
        "min_mixing": 0.005, # minimum amount of mixing the full model with the masked model
    },
    "passive": {
        "steps": 1, # number of iterations to train the passive model per train loop
        "weighting": [-1,0], # weighting_ratio, weighting schedule
        "include_gradient": False, # includes input gradients in the output values
    },
    "train": {
        "train_edge": list(),
        "dummy": "",
        "train": False,
        "num_frames": 0, # number of frames to train on
        "load_rollouts": "", # where to load the data from (folder)
        "load_filename": "object_dumps.txt", # file from which to load data
        "load_encodings": "", # loads the encodings preconstructed by a trained encoder (ex. a vae)
        "train_test_ratio": 0.9, # r * 100 % will be train, the rest will be test
        "train_test_order": "random", # possible ordering of the data: random, ordered
        "num_iters": 0, # number of primary iterations
        "pretrain_frames": 0, # number of pretrining iteractions
        "batch_size": 128,
        "num_steps": 0, # if there is a sub-iteration number, such as the number of steps per iteration, thisis the value
        "param_update_frequency": -1, # updates the parameter values every n iterations
        "log_interval": 2000, # how many train iterations for printing a log output
        "intermediate_log_interval": -1, # how frequently to log _within_ a subtraining step (such as the expectation step of an EM algorithm)
        "loss_type": "mean", # uses the mean loss, options: mean, expectile
        "expectile": {
            "expt_threshold": 0, # either the difference from the estimated highest log likelihood, or this value
            "estimate_threshold": False, # if true, modifies the threshold
            "expt_ord": 2, # 1 uses quantile, 2 uses expectile
            "expt_tau": 0.5, # the ratio rate for the expectile, if -1 then expectile is not used
        }
    },
    "pretrain": {
        "num_iters": 0, # number of pretraining iterations
        "weighting_type": "uni", # weighting scheme for pretraining
        "pretrain_log_interval": 1000, # frequency of logging during pretraining
        "pretrain_full_steps": 1, # number of steps of full training per iteration
        "pretrain_active_steps": 1, # number of masked training per iteration
        "pretrain_trace_steps": 1, # number of trace training per iteration
        "pretrain_infer_interval": 0, # how often inference is run
    },
    "inter": {
        "passive_weighting": [0,0], # passive_error_cutoff, passive_error_upper
        "use_active_as_passive": False, # uses the active model as the passive mode
        "use_all_as_single": False, # use the all model (many to many) to replace the full model (many to one) 
        "use_full_as_pair": False, # use the full model (many to one) to replace the pair model (two to one) 
        "train_forms": [], # list of names of training options: passive, single_passive, pair, full, mask, binaries, all_full, all_mask, rand_mask, rand_all_mask, cluster_active, cluster_inter, em, all_em, null_em, all_null_em
        "pretrain_forms": [], # list of names of pretraining options: same as above
        "pair_names": [], # the pair to train a model for, of format: P1|P2|...->Target
        "train_names": [], # for full training, the names of variables to train prediction models for
        "passive_net_type": "multimlp", # network type for the passive model
        "masking": { # TODO: move this out of inter into the masking part
            "masking_form": "mixed", # the masking form: soft (gumbel), hard, mixed (bernoulli * weight), flat (fixed threshold) 
            "mixed_interaction": "mixed", # the kind of mixing used if masking_form == mixed. Options: weighting, relaxed, mixed, hard
            "dist_temperature": 1, # distribution temperature for relaxed distributions on the interaction mask
            "selection_temperature": 1, # distribution temperature for relaxed distributions on the selection network
            "cap_probability": [1e-5, 1e-5], # 1-s cannot be higher confidence than this probability in interaction training
            "additional_mask_forms": [], # computes other values for masking though not used for the main compute
        },
        "predict_dynamics": False, # predicts the difference between the current and next, rather than the next state
        "passive_reassign": False, # reassigns the current state with the passive state and removes the passive variable 
        "predict_next_state": True, # predicts the next state, as opposed to the name in the current 
        "weighting_type": "uni", # the form of weights, supports: passive_error, trace, uni
        "weight_proximity": False, # uses proximity in passive_error weighting scheme
        "null_em": {
            "null_bin_weight_type": "likelihood", # the way the weights are computed. Available options: "likelihood", uni
            "bin_weight": [0,0,1,5], # the thresholds for the null bin weights, and clip value. For likelihood: null_log_prob upper bound, log_prob lower bound, difference (divide), clip value
        },
        "regularization": {
            "expt_passive": False, # uses the expectile loss with the passive model
            "attention": {
                "attn_reg_lambda": -1, # amount to regularize the attention head entropy (needs to be large, like 100), -1 unused
            },
            "splitting": {
                "splitting_type": "ones", # either ones, zeros, both, all
                "reassignment_type": "null", # how values are reassigned, either counterfacutally or with null, random, perturb
                "splitting_bias": [0.0, 0.0], # the amount of bias to move the sigmoid for zeros and ones
                "num_counterfactual": 1, # the number of counterfactual alternatives tested
                "distance_form": "likelihood", # the distributional distance used, alternatively: target_likelihood, likelihood, mean, was1, was2
                "perturbation_magnitude": 0.2, # the amount of perturbation to generate a counterfactual
                "splitting_lambda": [-1,0], # the lambda value to scale the splitting (zeros, ones), defaults to ones if all
                "adaptive_splitting": [-1, 1], # the bias value and the div value, uses the splitting lambda, -1 for ones means unused
                "adaptive_splitting_type": "likelihood", # statistic to use to decide how good the forward model is doing: likelihood, mean, meanvar
                "splitting_passive": False, # uses the splitting loss with the passive training
            },
            "embedding": {
                "embed_reg_lambda": -1, # regularizing the embeddings (the state immediately prior to masking)
                "adaptive_embed_reg": [-1, 1], # the bias value and the div value, uses the embedding regularization lambda, -1 means unused
                "adaptive_embed_reg_type": "likelihood", # statistic to use to decide how good the forward model is doing: likelihood, mean, meanvar
                "mask_embed_reg_lambda": -1 # regularizing the embeddings after apply the interaction mask
            },
            "null_consistency": {
                "null_reg_lambda": -1, # penalizes nonzero embeddings for values that should be like nulls
                "null_embed_reg_type": ["flat", "likelihood"], # where to get the value from (flat, base, null, single_passive), then distance form: likelihood, mean, meanvar
                "null_adaptive": [1, 1], # bias (subtracted from likelihood) and flatten factor
            }
        }
    },
    "infer": {
        "infer_num": 1024, # number of states to run inference on
        "infer_types": ["soft"], # list of kinds of inference to check: soft, hard, mixed, nulls, gradient, counterfactual, attention
        "infer_interval": 0, # frequency of train steps that inference is called (0 or less for no infer)
        "infer_names": [], # list of names to perform inference on. Otherwise, infers on all variables
        "infer_dataset": False, # runs inference on the whole dataset after train loop, usually for evaluation
        "granger_threshold": [0.0, 0.0, 1.0], # active must be greater than, passive must be less than, active - passive must be greater than
        "train_weight_infer": "sample_active_full_weights", # uses the training weights (named) for inference
        "eval_weight_infer": "", # uses weights for evaluation, the only option is "trace_weights" TODO: add passive_error
        "eval_weight_lambda": 1, # uses weights for evaluation, the only option is "trace_weights" TODO: add passive_error
        "render_eval": [],# if nonempty, renders the high trace error states of the infer type for visualization
        "render_threshold": 0.3, # if nonempty, renders the high trace error states of the infer type for visualization
        "perform_analysis": -1, # generates per-state error analysis info, if available, for the given index
        "attention": {
            "select_ideal": False, # selects a attention head weight threshold as the midpoint between the average one logit and the average zero logit
            "attention_threshold": 0.5 # the magnitude of the average head weights above which a cause label is given
        },
        "counterfactual": {
            "reassignment_type": "random", # how values are reassigned, either counterfacutally or with null, random, perturb
            "num_counterfactual": 5, # the number of counterfactual alternatives tested
            "distance_form": "likelihood", # the distributional distance used, alternatively: wasserstein, mean l2/l1
            "perturbation_magnitude": 0.1, # the amount of perturbation to generate a counterfactual: target_likelihood, likelihood, mean, was1, was2
            "select_ideal": False, # selects a counterfactual threshold as the midpoint between the average one logit and the average zero logit
            "counterfactual_threshold": 1 # the magnitude of the likelihood difference deemed sufficient for labeling a cause
        },
        "gradient": {
            "gradient_threshold": 1, # the magnitude of the gradient deemed sufficient for labeling a cause
            "select_ideal": False, # selects a gradient threshold as the midpoint between the average one logit and the average zero logit
        },
        "pretrain_mask_mode": "full", # what kind of masking to do inference with for pretraining, either full, mask, all_full, all_mask
        "train_mask_mode": "mask", # what kind of masking to do inference with for pretraining
        "nulls": {
            "max_combination": 1, # the manimum size of the combinations to null at once, this can get expensive at high numbers
            "use_vals": "dist", # dist or weights, used as indicators for null effect
            "distance_form": "likelihood", # what kind of distance is used, this uses the difference in the likelihood of the actual outcome
            "dist_epsilon": 1.0, # the distance used to indicate a change
            "weight_form": "", # name of the kind of weight returned, attention or otherwise
            "weight_epsilon": 0.1, # difference between mask weights for null and non-null distribution
            "pseudo_null": "", # nulls out state elements with low passive error, null is random. Options: "zero" (zero out all values), "rand" (randomly zero out according to pseudo_null_rate), "" (do not use)
            "pseudo_null_passive_weighting": [-1,-1,-1], # passive error to use for pseudo null
            "pseudo_null_rate": 0.5 # randomly nulls out values according to null rate
        },
    },
    "image_enc": {
        "encoding_dim": 10, # dimension of the image encodings
    },

        # "load_intermediate": "",
        # "save_intermediate": "",
        # "interaction_testing": [],
        # "proximity_epsilon": -1,
        # "compare_trace": False,
        # "passive": {
        #     "train_passive": True, # trains the passive model (might only want to train the active model)
        #     "load_passive": "",
        #     "passive_iters": 0,
        #     "passive_log_interval": 1000,
        #     "pretrain_active": False,
        # },
        # "interaction": {
        #     "interaction_pretrain": 0,
        #     "subset_training": 0,
        #     "soft_train": 0, # adds this constant to the trace values then renormalizes
        # },
        # "active": {
        #     "active_steps": 1,
        #     "no_interaction": 0,
        #     "weighting": [0,0,-1,0], # must be length 4
        #     "active_log_interval": 100,
        #     "log_gradients": False,
        #     "interaction_schedule": -1.0, # if negative, uses 0.5 fixed tradeoff, if 0<= is <=1 uses the value at fixed value, if > 1 uses exp(-i/is)
        #     "inline_iters": [5, 1, 1000],
        #     "interaction_weighting": [0,0], # must be length 2
        #     "intrain_passive": 0,
        #     "error_binary_upweight": 1,
        #     "adaptive_inter_lambda": -1.0, # adaptive weight for training the forward model with full or inter inputs, still uses interaction schedule for tradeoff 
        #     "log_timestamps": True,
        #     "train_true": False, # trains using the true trace values
        # },





    # "full_inter": {
    #     "object_id": True, # appends a 1 hot identifier of the object class to the object
    #     "lasso_lambda": [1, 0, 0, -1, -1], # lasso_lambda, open mask forcing, 0.5 mask forcing, one mask schedule, masking schedule
    #     "lasso_order": 1,
    #     "adaptive_lasso": [-1.0, -1.0], # adapts the lasso value according to the magnitude of the active interaction loss (multiplied by this hyperparameter), flattens the decay rate (exp(-\|perf diff\| / adaptive[1]))
    #     "adaptive_lasso_bias": [0.0, -1.0], # biases the adaptive lasso baseline constant by negative the adaptive bias, decayed at the schedule
    #     "adaptive_lasso_type": "likelihood", # different ways of computing adaptive lasso, uses: likelihood, l2 mean, l1 mean and variance
    #     "reset_caloss": False, # resets the converged active loss after passive training 
    #     "dual_lasso": [0,0],
    #     "entropy_lambda": [0,0], # penalizes the individual values of the binary mask for having high entropy (close to 0.5)
    #     "use_active_as_passive": False,
    #     "proximal_weights": False,
    #     "reconstruct_embedding": False, # trains the factor-specific embeddings with reconstruction, only implemented for linpair net
    #     "log_gradients": False,
    #     "train_full_only": False,
    #     "lightweight_passive": True,
    #     "train_names": [], # for debugging, only trains certain names
    #     "load_forward_only": "", # loads only the forward models
    #     "selection_mask": False, # uses a selection mask network
    #     "selection_train": "",
    #     "nextstate_interaction": False, # uses the outcome for the interaction network
    #     "predict_next_state": True, # predicts the next state, otherwise, predicts the current state (useful for DAG methods)
    #     "delay_inter_train": -1, # delays starting interaciton training for this number of batches
    #     "partial_active_reset": [-1,-1,-1], # the number of layers to reset, the frequency of interactions (num iters), the iteration to stop resetting at
    #     "partial_inter_reset": [-1,-1,-1],
    # },
    # "EMFAC": {
    #     "full_train": "",
    #     "num_masks": 1,
    #     "is_EMFAC": False,
    #     "E_step_iters": 1, # steps of forward model training
    #     "M_step_iters": 1, # steps of interaction model training
    #     "refine_iters": 1,
    #     "binary_cost": 1,
    #     "model_mask_weights": [0,0,0.4], # weight for the forward model performance, weight for the mask magnitude, weight lambda regularization
    #     "weight_forward": False, # Weights the losses by the sampling weights, TODO: might need more options
    #     "train_reconstruction": False # trains the embedding to perform reconstruction
    # },
    # "multi_inter": {
    #     "evaluate": False, # evaluation mode, where a trained model is tested for how well it can predict actual cause using null assumption, if 0, no evaluation, if 1, only evaluation, if 2, both train and eval
    #     "max_combination": 1, # searches for up to this many different parent sets (num factors combinations max_combination) when looking for simultanious interactions
    #     "dist_epsilon": 1e-1, # the minimum closeness of the distributions to e considered a null component
    # },
    # "mask": {
    #     "min_sample_difference": 1,
    #     "var_cutoff": [0.1],
    #     "num_samples": 30,
    #     "sample_grid": True,
    #     "dynamics_difference": False,
    # },
    # "sample": { # TODO NEW
    #     "sample_type": "uni",
    #     "sample_distance": -1,
    #     "sample_schedule": -1,
    #     "sample_raw": False,
    #     "sample_parent": False,
    #     "param_recycle": -1,
    # },
    # "extract": {
    #     "single_obs_setting": [0, 0, 0, 0, 0, 0],
    #     "relative_obs_setting": [0, 0, 0, 0, 0],
    #     "combine_param_mask": True
    # },
    # "option": { # mostly terminate and reward parameters
    #     "term_form": "param",
    #     "term_as_done": False,
    #     "use_binary": False,
    #     "true_done": True,
    #     "trunc_true": False,
    #     "epsilon_close": [-1.0],
    #     "param_norm": 1.0,
    #     "constant_lambda": 0.0,
    #     "true_lambda": 0.0,
    #     "param_lambda": -1.0,
    #     "inter_lambda": -1.0,
    #     "negative_true": -1.0,
    #     "interaction_as_termination": False,
    #     "temporal_extend": -1,
    #     "time_cutoff": 0,
    #     "between_terminate": 1
    # },
    # "action": {
    #     "use_relative_action": False,
    #     "relative_action_ratio": -1,
    #     "min_active_size": 10,
    #     "discrete_params": False,
    #     "round_values": False,
    # },
    # "collect": {
    #     "buffer_len": 100000,
    #     "prioritized_replay": list(),
    #     "test_episode": True,
    #     "max_steps": 1000,
    #     "terminate_reset": False,
    #     "display_frame": 0,
    #     "save_display": "",
    #     "stream_print_file": "",
    #     "demonstrate_option": False,
    #     "aggregator": {
    #         "sum_rewards": True,
    #         "only_termination": False,
    #     },
    #     "time_check": False, # returns a miss only if timing check fails
    #     "omit_done": False, # omits dones (EOE) from the buffer entirely
    # },
    # "policy": {
    #     "learning_type": "dqn",
    #     "epsilon_random": 0.0,
    #     "epsilon_schedule": -1,
    #     "rainbow": {
    #         "num_atoms": 51,
    #         "is_dueling": True,
    #         "is_noisy": True,
    #     },
    #     "ground_truth": "",
    #     "learn": {
    #         "post_random_iters": 0,
    #         "grad_epoch": 10,
    #         "sample_form": "merged",
    #     },
    #     "primacy": {
    #         "reset_layers": -1,
    #         "reset_frequency": -1,
    #         "primacy_iters": -1,
    #         "stop_resets": -1
    #     },
    #     "discount_factor": 0.99,
    #     "lookahead": 2,
    #     "max_min_critic": [-1.0,-1.0],
    #     "reward_normalization": False,
    #     "tau": 0.005,
    #     "sac_alpha": 0.2, # TODO: move sac arguments into a sac specific subcategory
    #     "auto_alpha": False,
    #     "alpha_lr": .0003,
    #     "deterministic_eval": False,
    #     "logging": {
    #         "log_interval": 5,
    #         "train_log_maxlen": 0,
    #         "test_log_maxlen": 0,
    #         "initial_trials": 10,
    #         "test_trials": 10,
    #         "max_terminate_step": [0,0]
    #     }
    # },
    # "hindsight": {
    #     "use_her": False,
    #     "resample_timer": -1,
    #     "select_positive": 0.5,
    #     "interaction_resample": False,
    #     "max_hindsight": -1,
    #     "early_stopping": False,
    #     "interaction_criteria": 0,
    #     "min_replay_len": -1,
    #     "num_param_samples": -1,
    # },
    # "inline": {
    #     "interaction_config": "",
    #     "inpolicy_iters": 5000,
    #     "inpolicy_schedule": -1,
    #     "inpolicy_times": -1,
    #     "policy_intrain_passive": False,
    #     "intrain_weighting": [-13, 1, 1, -1],   
    #     "save_inline": False,
    #     "policy_inline_iters": [5, 1, 1000],
    #     "reset_weights": [0,0,0]
    # },
    # "testing": {
    #     "test_type": "",
    # },
    "network": copy.deepcopy(network_args),
}
